{ "cells": [ { "cell_type": "markdown", "id": "fe1ee56b-fcbd-4ee7-be59-04d8799921bf", "metadata": {}, "source": [ "# Pyteomics" ] }, { "cell_type": "raw", "id": "268917e0-6da5-4065-8c8f-b7e3710c6e80", "metadata": { "editable": true, "raw_mimetype": "text/html", "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "\n", " \"Launch\n", "\n", "

" ] }, { "cell_type": "markdown", "id": "ef467988-ea67-4119-8519-57ec9c70784f", "metadata": {}, "source": [ "## Install pyteomics" ] }, { "cell_type": "code", "execution_count": 3, "id": "57cafd99-f3fb-455a-b2fa-288e6bccf374", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pyteomics in /home/joshua/mambaforge/envs/pyopenms-viz/lib/python3.12/site-packages (4.7.5)\n", "Collecting lxml\n", " Using cached lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n", "Using cached lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl (4.9 MB)\n", "Installing collected packages: lxml\n", "Successfully installed lxml-5.3.0\n" ] } ], "source": [ "!pip install pyteomics lxml --quiet" ] }, { "cell_type": "code", "execution_count": 4, "id": "6f116382-7fea-4435-9717-b742db4e66c6", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from pyteomics import mzml" ] }, { "cell_type": "markdown", "id": "246fbbb0-8418-476e-91ad-ca5dbeb8d87f", "metadata": {}, "source": [ "## Download Data" ] }, { "cell_type": "code", "execution_count": 2, "id": "1f193209-cd41-4494-a4d4-aaeef0a132d7", "metadata": {}, "outputs": [], "source": [ "import requests\n", "\n", "url = 'https://raw.githubusercontent.com/levitsky/pyteomics/master/tests/test.mzML'\n", "file_name = 'test.mzML'\n", "\n", "# # Send a GET request to the URL\n", "response = requests.get(url)\n", "\n", "# # Save the content of the response to a file\n", "with open(file_name, 'wb') as file:\n", " file.write(response.content)\n", "\n", "print(f'File {file_name} downloaded successfully!')" ] }, { "cell_type": "markdown", "id": "ab5fc65d-90fb-41a6-a01e-28cf934d51de", "metadata": {}, "source": [ "## Load `.mzML` file and convert to `pd.DataFrame`" ] }, { "cell_type": "code", "execution_count": 5, "id": "7a0d5e8f-7fbe-4f23-af41-dd652e1c6130", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ms_levelrtmzint
010.004935200.0001880.0
010.004935200.000430.0
010.004935200.0006730.0
010.004935200.0009150.0
010.004935202.6058290.0
...............
110.0059351999.9130860.0
110.0059351999.9372560.0
110.0059351999.9615480.0
110.0059351999.9857180.0
110.0059352000.0098880.0
\n", "

39828 rows × 4 columns

\n", "
" ], "text/plain": [ " ms_level rt mz int\n", "0 1 0.004935 200.000188 0.0\n", "0 1 0.004935 200.00043 0.0\n", "0 1 0.004935 200.000673 0.0\n", "0 1 0.004935 200.000915 0.0\n", "0 1 0.004935 202.605829 0.0\n", ".. ... ... ... ...\n", "1 1 0.005935 1999.913086 0.0\n", "1 1 0.005935 1999.937256 0.0\n", "1 1 0.005935 1999.961548 0.0\n", "1 1 0.005935 1999.985718 0.0\n", "1 1 0.005935 2000.009888 0.0\n", "\n", "[39828 rows x 4 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_file = \"./test.mzML\"\n", "\n", "ms_level, rt, mz, intens = [], [], [], []\n", "with mzml.MzML(input_file, decode_binary=False) as reader:\n", " for scan in reader:\n", " ms_level.append(scan['ms level'])\n", " rt.append(scan['scanList']['scan'][0]['scan start time'])\n", " mz.append(scan['m/z array'].decode())\n", " intens.append(scan['intensity array'].decode())\n", "df = pd.DataFrame({'ms_level':ms_level, 'rt':rt, 'mz':mz, 'int':intens})\n", "# Explode mzarray and intarray columns to make the DataFrame long\n", "df = df.explode(['mz', 'int'])\n", "df" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }